Commented R code for analyses described in Brock et al. PiPG Progress reports
This document shows how to perform the analyses described in Brock et al. largely using the data from Lee et al. (2018) and Perry et al. (2010). It is not a detailed description of the methods themselves (for that see references in the main document) but rather an example of how to conduct the sorts of analyses mentioned in the article. Knitting it from scratch may take some time - there are some analyses that may take hours (but these are cached and so the knitting process will look for saved versions before doing an analysis).
First we’ll load some packages necessary for the wrangling, visualisation, and analyses.
# load pacman package + install if missing - used to load/install multiple packages at once
if (!require("pacman")) install.packages("pacman")
# load packages + install any that are missing
# GP - I ma having linking issues in nix with mvabund...
pacman::p_load(tidyverse, broom, knitr, janitor, ggfortify, patchwork,
mdthemes, vegan, analogue, ggvegan, Rtsne, umap, ecotraj,
coenoflex, indicspecies, devtools)
# install packages from GitHub (not available on CRAN) - this will check, and only install if reqd
devtools::install_github("phytomosaic/ecole")
devtools::install_github("phytomosaic/fitNMDS")
devtools::install_github("jfq3/ggordiplots")
devtools::install_github("pmartinezarbizu/pairwiseAdonis/pairwiseAdonis")
devtools::install_github("gavinsimpson/ggvegan")
# load newly installed packages from GitHub
pacman::p_load(ecole, fitNMDS, ggordiplots, pairwiseAdonis, ggvegan)
# load custom functions for plotting ordinations from vegan
source("auxScripts/helperPlots.r")
Load data we’ll explore and do some re-organising. We need to be a bit cautious because although base R and many multivariate analysis packages (e.g., vegan) use row names, the tidyverse (tibbles) does not as they don’t confirm to tidy data. Need to be aware of this when moving between the two.
First, load freshwater invertebrate, physiochemical and location data from Auckland, New Zealand from Lee et al. (2018). ## @ Finn - maybe add 2-3 sentences with some context for this work?
# Load data and tidy up
# invert abundances
inverts <- read.csv(file = "data/inverts_to_species.csv", row.names = 1)
# water chem at some sites
chem <- read.csv(file = "data/physicochemical.csv", row.names = 1)
# location data (WGS84 coords)
locations <- read.csv(file = "data/Site_Coords_WGS84.csv", row.names = 1)
invert_sites <- rownames(inverts)
chem_sites <- rownames(chem)
# standardise data
inverts_pa <- decostand(inverts, method = "pa") # Presence-absence
inverts_l10 <- log10(inverts + 1) # log-10 transformed
inverts_hel <- decostand(inverts, method = "hel") # Hellinger transformed
# Get a subset of the water chemistry data
chem <- janitor::clean_names(chem)
# Only keep sit4s with invert data and remove some less useful variables
chem_sub <- chem[chem_sites %in% invert_sites, ]
chem_sub <- select(chem_sub, -order, -land_form, -taxa, -qmci,
-mci, -ept, -proportion_ept, -gambusia)
# Subset of chemical data for hypothesis testing
chem_sub_hyp <- chem_sub %>%
select(do, temperature, velocity, proportion_silt, proportion_macrophyte,
proportion_riparian_cover20, pasture, din)
# Binary vectors for riparian cover and native cover
rip_cover <- chem_sub$proportion_riparian_cover0_5 > 0.2
native_cover <- chem_sub$native > 0.2
Second, load vegetation data from Aotea/Great Barrier Island (New Zealand) from Perry et al. (2010). These data are collected from geographic locations across Aotea using a plotless method of vegetation survey. The geographic locations span a succesional sequence from post-fire recovery to mature secondary forest.
# Load the Aotea veg data
# vegan uses row names, but the tidyverse doesn"t!
aotea <- read.csv(file = "Data/aotea_allyears_tidy.csv", row.names = 1)
aotea_pa <- decostand(aotea, method = "pa") # convert to presence-absence
aotea_pa_nosing <- aotea_pa[, colSums(aotea_pa) > 1] # PA without any singletons
# Make the site labels more useful
dfr <- data.frame(site = rownames(aotea_pa))
aotea_site <- separate(dfr, site, into = "code", "_") # ignore the warning
aotea_site$code <- str_sub(aotea_site$code, end = 2)
rm(dfr)
Now, some basic visualisation to highlight the nature of multivariate (ecological) data using the Lee et al. data. This is Figure X in the main document.
# First, lengthen the data to make it easier for ggplot
aotea_long <- aotea %>%
rownames_to_column(var = "site") %>%
pivot_longer(cols = -site, names_to = "taxa", values_to = "abund")
inverts_long <- inverts %>%
rownames_to_column(var = "site") %>%
pivot_longer(cols = -site, names_to = "taxa", values_to = "abund")
# Histogram of number of sites each spp occurs at for Lee et al.
n_sites_iv <- data.frame(n = colSums(inverts > 0))
sites_hist_iv_gg <- ggplot(n_sites_iv) +
geom_histogram(aes(x = n), binwidth = 1) +
labs(x = "No. of sites", y = "Frequency") +
theme_minimal()
# A heat map of abundance for species x site for Lee et al. data
inverts_long$taxa_num <- as.numeric(factor(inverts_long$taxa,
levels = unique(inverts_long$taxa)))
heat_iv_gg <- ggplot(inverts_long, aes(x = site, y = taxa_num)) +
geom_raster(aes(fill = log10(abund)), show.legend = FALSE) +
scale_fill_gradient(low="grey99", high="red") +
labs(x = "Site", y = "Taxa") +
theme_minimal() +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank())
# A dotplot to replicate that in Wang et al. 2012 Methods Ecol Evol
wang_iv <- data.frame(x = as.vector(as.matrix(inverts)),
y = rep(1:77, each = 29),
col = chem_sub$native > 0.4)
dot_wang_iv_gg <- ggplot(data = wang_iv) +
geom_point(aes(x = log10(x+1), y = y, col = col),
alpha = 0.8, size = 2, show.legend = FALSE) +
scale_color_brewer(type = "qual") +
labs(y= "Taxa", x = "Abundance (log<sub>10</sub>[x+1])") +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank()) +
md_theme_minimal() # mdthemes lets us use markdown in graph notations
# combine plots
fig1 <- sites_hist_iv_gg + heat_iv_gg + dot_wang_iv_gg +
plot_annotation(tag_levels = "a")
Histogram of number of sites each taxa occurs at (a). Heat map of abundance for species x site, grey = not present, white = low abundances and red = high abundnces (b). Dotplot of taxa abundances, replicating that in Wang et al. 2012 Methods Ecol Evol (c). All figures use the Lee et al. 2018 Invertebrate data
Next, look at the mean-variance relationship in the two datasets - this is of concern in tests of location and dispersion, and adequately addressing it is central to model-based multivariate analysis (Warton et al. (2015)).
# freshwater invert data
# calculate mean and variance
inverts_mv <- data.frame(mean = colMeans(inverts),
var = apply(inverts , 2, var))
# plot
invert_mean_gg <- ggplot(data = inverts_mv) +
geom_point(aes(x = log10(mean), y = log10(var)), alpha = 0.8, size = 2) +
labs(y = "Variance (log<sub>10</sub> scale)",
x = "Mean (log<sub>10</sub> scale)") +
md_theme_bw()
# Aotea vegetation data
# calculate mean and varaince
aotea_mv <- data.frame(mean = colMeans(aotea_pa),
var = apply(aotea_pa , 2, var))
# plot
veg_mean_gg <- ggplot(data = aotea_mv) +
geom_point(aes(x = log10(mean), y = log10(var)), alpha = 0.8, size = 2) +
labs(y = "Variance (log<sub>10</sub> scale)",
x = "Mean (log<sub>10</sub> scale)") +
md_theme_bw()
# combine plots
fig2 <- invert_mean_gg + veg_mean_gg +
plot_annotation(tag_levels = "a")
Mean-variance relationships for Lee et al. 2018 invertebrate (a) and Perry et al 2010 (b) vegetation data.
Having visualised the data, we can look at some ways to explore it using multidimensional scaling as a means of dimensional reduction – these methods are available in the vegan package, which is extremely well documented and support by a series of vignettes. Here, we are using the methods in an exploratory way, to generate hypotheses.
First, classical/metric MDS of the Lee et al. (2018) invertebrate data. In vegan we can do this either unweighted or weighted; below we do both with row sums as proportions of matrix sum as weights. Also, because we use a non-Euclidean distance metric there is a risk of negative eigenvalues, so we use the add argument. Using eig = TRUE returns the eigenvalues for us. Here, the weighted solution is strongly influence by a few sites with much higher abundance than others (e.g., F16 and F01).
# Non-weighted using the Bray_Curtis
pcoa_iv <- wcmdscale(d = vegdist(inverts), eig = TRUE, add = "lingoes")
# get the weights (row sums)
wgt_iv <- iv <- rowSums(inverts)/sum(inverts)
# Weighted
pcoa_wgt_iv <- wcmdscale(d = vegdist(inverts), w = wgt_iv, eig = TRUE, add = "lingoes")
# Get the coordinates (using vegan::scores)
pcoa_scores <- data.frame(scores(pcoa_iv)[,1:2],
scores(pcoa_wgt_iv)[,1:2],
site = rownames(inverts),
w = wgt_iv) %>%
rename(Dim1_wgt = Dim1.1, Dim2_wgt = Dim2.1)
# Get the eigenvalues and their proportional value
pcoa_eig <- data.frame(eig = pcoa_iv$eig[1:10],
expl = round(pcoa_iv$eig[1:10] / sum(pcoa_iv$eig[1:10]) * 100, 2))
pcoa_wgt_eig <- data.frame(eig = pcoa_wgt_iv$eig[1:10],
expl = round(pcoa_wgt_iv$eig[1:10] / sum(pcoa_wgt_iv$eig[1:10]) * 100, 2))
# Make some axis labels
xl <- paste("Dimension 1 (",pcoa_eig$expl[1], "%)", sep = "")
yl <- paste("Dimension 2 (", pcoa_eig$expl[2], "%)", sep = "")
xl_w <- paste("Dimension 1 (",pcoa_wgt_eig$expl[1], "%)", sep = "")
yl_w <- paste("Dimension 2 (", pcoa_wgt_eig$expl[2], "%)", sep = "")
## unweighted plot
pcoa_gg <- ggplot(pcoa_scores) +
geom_text(aes(x = Dim1, y = Dim2, label = site), size = 3) +
labs(x = xl,
y = yl) +
coord_equal() +
theme_bw()
# eigenvalues plot
pcoa_eig_gg <- ggplot(pcoa_eig) +
geom_bar(aes(x = as.factor(1:10), y = expl),
stat = "identity") +
labs(x = "Dimension",
y = "Eigenvalue (% total)") +
theme_bw()
## weighted plot
pcoa_wgt_gg <- ggplot(pcoa_scores) +
geom_text(aes(x = Dim1_wgt, y = Dim2_wgt, label = site, size = w)) +
scale_size_continuous(range = c(1, 4)) +
labs(x = xl_w,
y = yl_w,
size = "weight") +
coord_equal() +
theme_bw()
# eigenvalues plot
pcoa_eig_wgt_gg <- ggplot(pcoa_wgt_eig) +
geom_bar(aes(x = as.factor(1:10), y = expl), stat = "identity") +
labs(x = "Dimension",
y = "Eigenvalue (% total)") +
theme_bw()
# gather plots
fig3 <- (pcoa_gg + pcoa_eig_gg + pcoa_wgt_gg + pcoa_eig_wgt_gg) +
plot_layout(widths = c(1, 1)) +
plot_annotation(tag_levels = 'a') &
theme(legend.position = 'bottom')
Unweighted metric MDS (a) and associated eigenvalues (b), weighted metric MDS (c) and associated eigenvalues (d) for Lee et al. 2018 Invertebrate data
Second, look at non-metric multidimensional scaling. Remember that although nMDS and PCOA are both called ‘multidimensional scaling’ they are quite different in that nMDS works on ranks and is based on a stochastic (non-analytic) algorithm. We’ll start with the Lee et al. invertebrate data showing the nMDS and then fitting environmental vectors on it (commands vegan::metaMDS and vegan::envfit, respectively).
# nMDS of the Lee et al. invertebrate data
# Bray-curtis distance matrix
invert_dist <- vegdist(inverts)
# Turn off automatic rescaling. Trace controls output verbosity
invert_mds <- metaMDS(invert_dist, autotransform = FALSE, trace = 1)
# Make the plots (using custom plot.mds.gg function from helperPlots.r)
iv_mds_gg <- plot.mds.gg(invert_mds, txt.x = -0.35,
txt.y = -0.35,labels = TRUE,
msl = 0.25)
# nMDS with points scaled in size by abundance of _Physa_
wgt <- inverts$Physa / 10
iv_bubble_gg <- plot.mds.bubble.gg(invert_mds, weights = wgt)
# Fit the chemical data as environmental vectors (envfit) and plot
iv_chem_fit <- envfit(invert_mds, chem_sub_hyp)
iv_fit_gg <- plot.envfit.gg(invert_mds, iv_chem_fit, p.max = .2,
clusters = native_cover)
# Or we could synthesis all the chem data with a PCA and fit those axes
chem_pca <- princomp(chem_sub, cor = TRUE)
chem_pca_scores <- scores(chem_pca)
iv_pca_fit <- envfit(invert_mds, chem_pca_scores[,1:8])
iv_fitpca_gg <- plot.envfit.gg(invert_mds,
iv_pca_fit,
p.max = .1,
clusters = native_cover)
# gather plots
fig4 <- (iv_mds_gg + iv_bubble_gg + iv_fit_gg + iv_fitpca_gg) +
plot_annotation(tag_levels = "a") +
plot_layout(ncol = 2, guides ="collect") &
theme(legend.position = "bottom")
Visually, there are no strong and obvious clusters in ordination space suggesting the sites lie along an environmental gradient (rather than discrete environmental conditions). PCA shows that there is a dominant axis relating to land-use (proportion forest, etc.) and a second axis relating to stream conditions (e.g. width and flow velocity) and water quality. The vectors suggest that Dissolved organic nitrogen (DIN) and water temperature are the most important environmental vectors (p < 0.05), and the PCA scores components 2, 5 and 6 are important (but only DIN p < 0.05).
nMDS of invertebrate data (a), with point size scale by Physa (genus snails) abundance (b), and physiocemical data fit as environmental vectors (c), and with environmetal vectors fit using PCA axis of all physiochemical variables (d)
One issue with fitting vectors is that they may not depict non-linear relationships adequately (although they have the benefit of allowing multiple variables to be seen). An option is to fit a surface (e.g. via a spline or general additive model [GAM]) instead. Here, we fit DIN and the first component of the PCA to the invertebrate nMDS. As with any surface fitting exercise it is important to evaluate the effects of different parametrisations (e.g., the knots used in the GAM) via the underlying diagnostics.
par(mfrow = c(2,2))
din_surf_k5 <- ordisurf(x = invert_mds, y = chem_sub$din,
knots = 5, main = "DIN, k = 5")
pca_surf_k5 <- ordisurf(x = invert_mds, y = chem_pca_scores[,1],
knots = 5, main = "PCA comp 1, k = 5")
din_surf_k10 <- ordisurf(x = invert_mds, y = chem_sub$din,
knots = 10, main = "DIN, k = 10")
pca_surf_k10 <- ordisurf(x = invert_mds, y = chem_pca_scores[,1],
knots = 10, main = "PCA comp 1, k = 10")
Dissolved organic nitrogen fit as a surface to the nMDS with different paramertisations of k (a, c). PCA component 1 fit as a surface to the nMDS with different paramertisations of k (b, d).
Now we’ll do an nMDS of the Aotea vegetation data. Unlike the Lee et al. invertebrate data, this information is presence-absence only, and covers many more sites and species (895 x 273). Although it may seem obvious that presence-absence is less informative than abundance data, this is context-dependent. Wilson (2012) suggests that abundance data are more informative that presence-absence data (in ordinations) only where small extents are covered, the vegetation is reasonably homogeneous, and the abundance information is high-quality. This example may have some issues with finding a convergent solution - check the vegan help for details.
Shepperd plot (a), nMDS ordination (b) of vegetation data. nMDS ordination of of vegetation data with point size scaled by the abundance of the late successional tree, Beilschmiedia tawa (c)
nMDS ordination of of vegetation data with point size scaled by the abundance of the late successional tree, Beilschmiedia tawa
Principle response curves seek to fit a smooth curve though some high dimensional space. They are probably most appropriate where there is a single dominant gradient (in time or space) (De’ath, 1999; Simpson & Birks, 2012). Here, we will apply it to the Lee et al. data, which is underpinned by a strong gradient in land-use, using the implementation in analogue::prcurve. The initial configuration can be quite important (see De’ath (1999)), so here we use the first axis of a correspondence analysis, with a GAM, and allow the complexity of the smoother to vary across species. We can show the fit of the curve at each iteration by setting plotit = TRUE.
# Fit PRC here with the complexity fixed across all species (vary)
# The method here specifies the starting conditions (first axis of a method)
# Here we use a GAM as the smoother (slower than spline)
inverts_pc <- prcurve(inverts_l10, method = "ca", trace = TRUE,
vary = TRUE, smoother = smoothGAM,
penalty = 1.4)
##
## Determining initial DFs for each variable...
##
|
| | 0%
|
|= | 1%
|
|== | 3%
|
|=== | 4%
|
|==== | 5%
|
|===== | 6%
|
|===== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======== | 12%
|
|========= | 13%
|
|========== | 14%
|
|=========== | 16%
|
|============ | 17%
|
|============= | 18%
|
|============== | 19%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 23%
|
|================= | 25%
|
|================== | 26%
|
|=================== | 27%
|
|==================== | 29%
|
|===================== | 30%
|
|====================== | 31%
|
|======================= | 32%
|
|======================== | 34%
|
|========================= | 35%
|
|========================= | 36%
|
|========================== | 38%
|
|=========================== | 39%
|
|============================ | 40%
|
|============================= | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|================================ | 45%
|
|================================= | 47%
|
|================================== | 48%
|
|=================================== | 49%
|
|=================================== | 51%
|
|==================================== | 52%
|
|===================================== | 53%
|
|====================================== | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|========================================= | 58%
|
|========================================== | 60%
|
|=========================================== | 61%
|
|============================================ | 62%
|
|============================================= | 64%
|
|============================================= | 65%
|
|============================================== | 66%
|
|=============================================== | 68%
|
|================================================ | 69%
|
|================================================= | 70%
|
|================================================== | 71%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|===================================================== | 75%
|
|====================================================== | 77%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 81%
|
|========================================================= | 82%
|
|========================================================== | 83%
|
|=========================================================== | 84%
|
|============================================================ | 86%
|
|============================================================= | 87%
|
|============================================================== | 88%
|
|=============================================================== | 90%
|
|================================================================ | 91%
|
|================================================================= | 92%
|
|================================================================= | 94%
|
|================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 97%
|
|===================================================================== | 99%
|
|======================================================================| 100%
##
##
## Fitting Principal Curve:
##
## Initial curve: d.sq: 341.848
## Iteration 1: d.sq: 221.398
## Iteration 2: d.sq: 201.251
## Iteration 3: d.sq: 196.651
## Iteration 4: d.sq: 195.267
## Iteration 5: d.sq: 195.251
##
## PC Converged in 5 iterations.
inverts_pc
##
## Principal Curve Fitting
##
## Call: prcurve(X = inverts_l10, method = "ca", smoother = smoothGAM,
## vary = TRUE, trace = TRUE, penalty = 1.4)
##
## Algorithm converged after 5 iterations
##
## SumSq Proportion
## Total 342 1.00
## Explained 147 0.43
## Residual 195 0.57
##
## Fitted curve uses 563.023 degrees of freedom.
plot.prcurve.gg(inverts_pc) +
ggtitle("Final solution for principle curve")
## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
Principal response curve for the freshwater invertebrate dataset.
We can see that the curve captures 43% of the variance but (say compared to a PCA) at the cost of a much more complex model. It is also possible to look at individual taxa response along the gradient identified in the principle curve. The command in analogue is sppResponse. Here, we have plotted the subset of taxa occurring at at least 17 (60%) of survey sites. Visually, we can see some taxa sorting along the gradient identified in the PC; some taxa respond curvilinearly (e.g., Chironomidae) but others show a threshold-type effect (e.g., Amphipods).
# taxa responses
invert_responses <- sppResponse(inverts_pc)
# subset common taxa (> 60% sites)
focal_spp <- chooseTaxa(inverts_l10, n.occ = 17, value = FALSE)
# plot
plot.sppResponse.gg(invert_responses, focal_spp = focal_spp)
Principal response curve taxa responses for the freshwater invertebrate dataset.
We will demonstrate two methods of non-linear mapping: tSNE and UMAP. As discussed in the main text these are different from the other methods we’ve discussed in the way they seek to balance local and global structure. This also means that we need to be careful in interpreting the size of clusters and so forth. For tSNE and UMAP we will use the Rtsne and umap packages, respectively.
For tSNE the key hyperparameter is perplexity, which controls the balance between local and global structure (by, in essence, determining each point’s neighbourhood). The Rtsne help recommends that \(3 * perpexity < nrow - 1\) (the default is 30, but that is too high for the Lee et al. invertebrate data).
# Try a couple of perplexity values, low theta for more accurate but slower
invert_tsne_p3 <- Rtsne(inverts, perplexity = 3, theta = 0.0)
invert_tsne_p6 <- Rtsne(inverts, perplexity = 6, theta = 0.0)
# Can suppl a distance matrix if we wish non-Euclidean distances between sites
invert_dist <- vegdist(inverts, method = "bray")
invert_tsne_bc_p3 <- Rtsne(invert_dist, perplexity = 3, is_distance = TRUE)
invert_tsne_bc_p6 <- Rtsne(invert_dist, perplexity = 6, is_distance = TRUE)
And plot them; clearly, and perhaps unsurprisingly, the choice of distance metric is very important. Likewise, the perplexity value influences the way the sites are arranged, especially for the Bray-Curtis metric.
tsne3_gg <- plot.nonlin.gg(invert_tsne_p3, site_labels = invert_sites, labels = TRUE)
tsne6_gg <- plot.nonlin.gg(invert_tsne_p6, site_labels = invert_sites, labels = TRUE)
tsned3_gg <- plot.nonlin.gg(invert_tsne_bc_p3, site_labels = invert_sites, labels = TRUE)
tsned6_gg <- plot.nonlin.gg(invert_tsne_bc_p6, site_labels = invert_sites, labels = TRUE)
tsne3_gg + tsne6_gg
tSNE for the Lee et al. data with different perplecities and using distance matrices.
tsned3_gg + tsned6_gg
tSNE for the Lee et al. data with different perplecities and using distance matrices.
# plot.tsne.gg <- function(tsn, sites = rownames(tsn), labels_txt = TRUE, clusters = NULL)
We can now apply the tSNE to the much larger Aotea vegetation data; again, assessing two different perplexities and with/without the Bray-Curtis measure.
# Rtsne will not work if there are duplicated rows (i.e. identical sites)
# so we need to remove them
undup <- !duplicated(aotea_pa)
aotea_pa_nodup <- aotea_pa[undup,]
aotea_sites_nd <- aotea_site$code[undup]
# Try a couple of perplexity values, low theta for more accurate but slower
aotea_tsne_p25 <- Rtsne(aotea_pa_nodup, perplexity = 25, theta = 0.0)
aotea_tsne_p50 <- Rtsne(aotea_pa_nodup, perplexity = 50, theta = 0.0)
# Can suppl a distance matrix if we wish non-Euclidean distances between sites
aotea_dist <- vegdist(aotea_pa_nodup, method = "bray")
aotea_tsne_bc_p25 <- Rtsne(aotea_dist, perplexity = 25, theta = 0.0, is_distance = TRUE)
aotea_tsne_bc_p50 <- Rtsne(aotea_dist, perplexity = 50, theta = 0.0, is_distance = TRUE)
And plot them. Here the differences between the two distance metrics are less clear. The algorithm tends to cluster the geographic locations together and order them in a successional sequence (from ‘AW’ to ‘Te’) while showing also that they form a continuum based on their composition.
lb <- 1:nrow(aotea_pa_nodup)
tsne_ao_p25_gg <- plot.nonlin.gg(aotea_tsne_p25, clusters = aotea_sites_nd)
tsne_ao_p50_gg <- plot.nonlin.gg(aotea_tsne_p50, clusters = aotea_sites_nd)
tsne_ao_d_p25_gg <- plot.nonlin.gg(aotea_tsne_bc_p25, clusters = aotea_sites_nd)
tsne_ao_d_p50_gg <- plot.nonlin.gg(aotea_tsne_bc_p50, clusters = aotea_sites_nd)
(tsne_ao_p25_gg + tsne_ao_p50_gg + tsne_ao_d_p25_gg + tsne_ao_d_p50_gg) +
plot_layout(ncol = 1, guides = "collect") &
theme(legend.position = "bottom")
tSNE for the Perry et al. data with different perplexities and using distance matrices; colours represent geographic locations.
We can now look at the UMAP algorithm (just for the Aotea data for space). The implementation in the umap library uses a special object class for the many possible configuration options, so we can tune the hyper-parameters in detail (including some distance metrics, there is more control of this available in the Python implementation).
set.seed(8736317)
# check the default options
umap.defaults
## umap configuration parameters
## n_neighbors: 15
## n_components: 2
## metric: euclidean
## n_epochs: 200
## input: data
## init: spectral
## min_dist: 0.1
## set_op_mix_ratio: 1
## local_connectivity: 1
## bandwidth: 1
## alpha: 1
## gamma: 1
## negative_sample_rate: 5
## a: NA
## b: NA
## spread: 1
## random_state: NA
## transform_state: NA
## knn: NA
## knn_repeats: 1
## verbose: FALSE
## umap_learn_args: NA
# Run a umap using the default settings
aotea_umap <- umap(aotea_pa)
# What if we change the number of neighbours evaluated
umap_hparam <- umap.defaults
umap_hparam$n_neighbors <- 30
aotea_umap_nn30 <- umap(aotea_pa, config = umap_hparam)
# Or the minimum distance between points (the packing of similar locations)
umap_hparam <- umap.defaults
umap_hparam$n_neighbors <- 15
umap_hparam$min_dist <- 0.05
aotea_umap_md05 <- umap(aotea_pa, config = umap_hparam)
The final configuration is reasonably robust to the hyper-parameter values (location clustering is similar and the successional sequence evident).
aotea_umap_gg <- plot.nonlin.gg(aotea_umap, labels = FALSE, clusters = aotea_site$code)
aotea_umap_nn30_gg <- plot.nonlin.gg(aotea_umap_nn30, labels = FALSE, clusters = aotea_site$code)
aotea_umap_md05_gg <- plot.nonlin.gg(aotea_umap_md05, labels = FALSE, clusters = aotea_site$code)
(aotea_umap_gg + aotea_umap_nn30_gg + aotea_umap_md05_gg) +
plot_layout(guides = "collect") &
theme(legend.position = "bottom")
UMAP for the Perry et al. data with different hyper-parameter values; colours represent geographic locations.
Now, we can move onto some methods of constrained ordination. First, a look at the ‘tikus’ dataset (measures of coral reef communities over time) to compare an unconstrained and a constrained (by time) ordination (data available in the mvabund package in R). Here, a distance-based redundancy analysis (vegan::dbrda) with an nMDS is presented. We will log-transform the data. This example is presented in detail in ver Braak and Šmilauer (-ter Braak & Šmilauer (2015)). Note that for the metaMDS we have increased the number of runs (try) and iterations (maxit) to help ensure convergence on an optimal solution.
# load data - this as per the mvabund package
# https://github.com/aliceyiwang/mvabund/blob/master/data/tikus.RData
load("Data/tikus.RData")
# log transform
tikus_log <- log(tikus$abund + 1)
tikus_survey <- tikus$x$time
# Unconstrained ordination of the Tikus coral reef data
# nMDS - increased try and maxit for convergence
tikus_nmds <- metaMDS(tikus_log, autotransform = FALSE,
wascores = FALSE,
try = 100, maxit = 500, trace = 1)
# plot
tik_mds_gg <- ggplot(data = as.data.frame(scores(tikus_nmds))) +
geom_point(aes(NMDS1, NMDS2, col = as.factor(tikus$x$time)), size = 3) +
scale_color_brewer(name = "Year", palette = "PuRd") +
coord_equal() +
theme_minimal()
# Constrained (by time) ordination of the Tikus coral reef data
# Note use of formula notation
tikus_rd <- dbrda(tikus_log ~ tikus_survey, distance = "bray")
tikus_rd_scores <- data.frame(scores(tikus_rd)$sites)
#plot
tik_rd_gg <- ggplot(data = tikus_rd_scores) +
geom_point(aes(dbRDA1, dbRDA2, col = as.factor(tikus_survey)), size = 3) +
scale_color_brewer(name = "Year", palette = "PuRd") +
coord_equal() +
theme_minimal()
fig6 <- (tik_mds_gg + tik_rd_gg) +
plot_annotation(tag_levels = "a") +
plot_layout(guides = "collect", widths = c(1, 1)) &
theme(legend.position = "bottom")
Uncontrained ordination of coral reef communities (a), and constrained (by time) ordination (b)
A basic form of constrained analysis is redundancy analysis (RDA). We can do this using the command vegan::rda; note that if we do an RDA without any environmental variables we will have a PCA. Usually we will have two matrices; one describing the community at each site and the other the environmental variables (which constrain the ordination). The RDA in vegan allows use of a third matrix that can be partialled out (the ‘conditioning matrix’). We will do an RDA on a Hellinger transformed version of the Lee et al. data (transformed following the advice of Legendre & Gallagher (2001)).
# RDA on subset of env data
invert_rda <- rda(inverts_hel ~ ., data = chem_sub_hyp)
# adjusted r2 (via the vegan package)
invert_rda_r2 <- round(RsquareAdj(invert_rda)$adj.r.squared, 3)
# Plot using autoplot in ggvegan (as an example)
rda_iv_gg <- autoplot(invert_rda, layers = c("biplot", "sites"), geom = "text") +
theme_minimal() +
theme(legend.position = "none")
Constrained redundancy analysis (RDA) for freshwater invertebrate and physiochemical data.
Now a canonical correspondence analysis (CCA) using the vegan::cca command. We’ll build two models on log_10_ transformed abundance data, one saturated and one using a subset of environmental predictors.
invert_cca_all <- cca(inverts_l10, chem_sub)
invert_cca_red <- cca(inverts_l10, chem_sub_hyp)
We can plot the saturated and reduced model and compare them.
# Plot them - autoplot here is from ggvegan
invert_cca_all_gg <- autoplot(invert_cca_all, layers = c("biplot","sites"),
geom = "text") +
theme_bw() +
theme(legend.position = "none")
invert_cca_red_gg <- autoplot(invert_cca_red, layers = c("biplot","sites"),
geom = "text") +
theme_bw() +
theme_bw() +
theme(legend.position = "none")
(invert_cca_all_gg + invert_cca_red_gg) +
plot_annotation(tag_levels = "a")
canonical correspondence analysis of freshwater invertebrate and environemntal data using all (a) and a subset (b) of environemtnal predictors.
dbRDA is similar to RDA but is designed to support a broader range of dissimilarity metrics (including some of those frequently used by community ecologists). Note that in vegan there are two versions, capscale (following Legendre and Anderson (1999)) and dbrda (following McArdle & Anderson (2001)). These differ in some implementation details (read the help!). Here we will use capscale to look at the Lee et al. invertebrate data.
invert_dbrda <- capscale(inverts ~ ., chem_sub_hyp, dist="bray")
dbrda_invert_gg <- autoplot(invert_dbrda, layers = c('sites', 'biplot')) +
theme_bw() +
theme(legend.position = 'none')
dbrda_invert_gg
dbRDA of freshwater invertebrate and enviromental data
For constrained ordinations it is possible to use various selection methods to assess the importance of individual variables and the significance of the axes. This can be done using forward, backward or stepwise model selection based on R2 or AIC; each of these come with caveats., For example, the vegan help for ordistep pithily notes, “… constrained ordination methods do not have AIC, and therefore the step may not be trusted.”. It is important to understand the various options available when conducting these (and any) analysis.
full_mod_dbrda <- capscale(inverts_hel ~ ., data = chem_sub_hyp, dist = "bray") # full model
null_mod_dbrda <- capscale(inverts_hel ~ 1, data = chem_sub_hyp, dist = "bray") # intercept-only model
sel_mod_dbrda <- ordistep(null_mod_dbrda,
scope = formula(full_mod_dbrda),
direction = "both",
permutations = how(nperm = 499), trace = FALSE )
anova(sel_mod_dbrda)
full_mod_dbrda_r2 <- round(RsquareAdj(full_mod_dbrda)$adj.r.squared, 3)
sel_mod_dbrda_r2 <- round(RsquareAdj(sel_mod_dbrda)$adj.r.squared, 3)
dbrda_full_gg <- autoplot(full_mod_dbrda, layers = c("sites", "biplot"), geom = "text") +
theme_minimal() +
theme(legend.position = 'none')
dbrda_sel_gg <- autoplot(sel_mod_dbrda, layers = c("sites", "biplot"), geom = "text") +
theme_minimal() +
theme(legend.position = 'none')
dbrda_full_gg + dbrda_sel_gg +
plot_layout(guides = "collect")
dbRDA including all hypothesised predictors, Adjusted R2 = 0.257 (a)and a subset based on model selection, Adjusted R2 = 0.247 (b)
In a similar vein, we can look at the significance of each variable or axis (again, using vegan::anova.cca). This command uses a permutation-based approach (see Legendre et al. (2011)). As an example, we will use RDA on the Hellinger-transformed invertebrate data.
full_mod <- rda(inverts_hel ~ ., chem_sub_hyp) # Model with all explanatory variables
mod_aov <- anova(full_mod) # This is a test for the overall model
term_aov <- anova(full_mod, by = "term") # can also do marginal tests - see vegan help
axis_aov <- anova(full_mod, by = "axis")
mod_aov # Model is significant
term_aov # look for significant terms
axis_aov # and axes
To demonstrate the method, we will build resampled NMDS for the Lee et al. invertebrate data and Perry et al. vegetation data based on 99 n resamplings using 80% of the total data. The bootstrapping allows us to look at the confidence we might have in, for example, differences between individuals or groups of sites due to sampling variation.
# Define number of simulations to do - slow for large data but could parallelise!
# We'll use fitNDMS::resamp_ndms to do this. BS is the bootstrap size and
# B the no., of sims. Underneath it is using vegan::metaMDS
nsims_iv <- 199
nsims_aotea <- 5 # need more for analysis but this is **slow**!
# Because this is slow we'll cache the op in a directory called 'rds'
if (file.exists("rds/invert_resamp.rds")) {
b_iv <- readRDS("rds/invert_resamp.rds") } else {
set.seed(1767283) # set seed for reproducibility
b_iv <- resamp_nmds(inverts, BS = nrow(inverts) * 0.8, B = nsims_iv, k = 2)
saveRDS(b_iv, "rds/invert_resamp.rds")
}
# as per above need to check convergence...
if (file.exists("rds/aotea_resamp.rds")) {
b_aotea <- readRDS("rds/aotea_resamp.rds") } else {
set.seed(4337659)
b_aotea <- resamp_nmds(aotea_pa, BS = nrow(aotea_pa) * 0.8, B = nsims_aotea, k = 2)
saveRDS(b_aotea, "rds/aotea_resamp.rds")
}
# Code here will trigger some warnings re expected pieces and coercion - can ignore
# Note that the number of times a site is sampled will not necessarily equal n_sims
# It is c. 0.8 x sims
b_iv_dfr <- data.frame(b_iv$points) %>%
rownames_to_column(var = 'site') %>%
arrange(site) %>%
separate(site, into = c('site', 'rep'), sep = "\\.") %>%
mutate(rep = as.numeric(rep)) %>%
mutate(rep = if_else(is.na(rep), 1 , as.numeric(rep) + 1))
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 29 rows [1, 162,
## 339, 483, 638, 792, 949, 1130, 1277, 1427, 1586, 1747, 1905, 2060, 2221, 2376,
## 2514, 2670, 2835, 3017, ...].
# TO DO - this might need checking
b_aotea_dfr <- data.frame(b_aotea$points) %>%
rownames_to_column(var = 'pcq.site') %>%
arrange(pcq.site) %>%
separate(pcq.site, into = c('pcq_point', 'rep'), sep = "\\.") %>%
mutate(site = gsub("_.*","", pcq_point)) %>%
mutate(rep = if_else(is.na(rep), 1 , as.numeric(rep) + 1))
## Warning: Expected 2 pieces. Additional pieces discarded in 544 rows [539, 540,
## 545, 546, 547, 548, 549, 550, 551, 553, 556, 557, 558, 560, 562, 563, 564, 565,
## 566, 567, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 692 rows [1, 4,
## 7, 12, 17, 18, 22, 25, 31, 35, 41, 46, 50, 55, 59, 61, 68, 72, 78, 80, ...].
## Warning in replace_with(out, !condition, false, "`false`", "length of
## `condition`"): NAs introduced by coercion
resamp_iv_gg <- plot.resamp(b_iv_dfr)
resamp_aotea_gg <- plot.resamp(b_aotea_dfr, legend = TRUE)
resamp_iv_stress_gg <- ggplot(data = data.frame(s = b_iv$stresses)) +
geom_histogram(aes(s), bins = 20) + labs(x = 'Stress', y = 'Frequency') +
theme_minimal()
resamp_aotea_stress_gg <- ggplot(data = data.frame(s = b_aotea$stresses)) +
geom_histogram(aes(s), bins = 10) +
labs(x = 'Stress', y = 'Frequency') +
theme_minimal()
(resamp_iv_gg + resamp_iv_stress_gg) / (resamp_aotea_gg + resamp_aotea_stress_gg) +
plot_annotation(tag_level = "a") +
plot_layout(guides = "collect") &
theme(legend.position = 'bottom')
Neither the invertebrate nor vegetation data we have used are suitable for trajectory analysis. Thus, we will use some simple simulated data (via the coenoflex library) to illustrate its use. To conduct trajectory analysis in R you can use the ecotraj library - it is support by an excellent vignette and some sample data.
set.seed (5963277)
# make the community - see the coenoflex help for full details
# the veg element of this is the site-species matrix
traj_comm <- coenoflex(numgrd = 1,
numplt = 20,
numsp = 80,
grdtyp = "e",
grdlen = 100,
width = 30,
variab = 50,
grdprd = 0,
alphad = 0.7,
pdist = "G",
sdist = "r",
skew = 3.0,
aacorr = 0,
cmpasy = 1.5,
maxtot = 100,
noise = 35,
slack = 0.3,
autlin = "irm(1)")
sim_grad_gg <- plot.coeno.gg(traj_comm)$grad_plot
sim_rich_gg <- plot.coeno.rich.gg(traj_comm)$rich_plot
sim_grad_gg + sim_rich_gg
## Warning: Removed 80 row(s) containing missing values (geom_path).
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'
We now have one simulated community so we can look at its trajectory through ordination space (here an nMDS).
# Description of sites and surveys
sites <- rep(1, 20)
surveys <- 1:20
# Locations in ordination space for the data
traj_mds <- metaMDS(traj_comm$veg, distance = "bray", autotransform = FALSE)
## Run 0 stress 0.06760424
## Run 1 stress 0.0687256
## Run 2 stress 0.06829441
## Run 3 stress 0.06690915
## ... New best solution
## ... Procrustes: rmse 0.0528988 max resid 0.1084081
## Run 4 stress 0.06707586
## ... Procrustes: rmse 0.0378114 max resid 0.1082975
## Run 5 stress 0.06872559
## Run 6 stress 0.06832658
## Run 7 stress 0.0661297
## ... New best solution
## ... Procrustes: rmse 0.01852594 max resid 0.0517766
## Run 8 stress 0.06690913
## Run 9 stress 0.06707602
## Run 10 stress 0.06707587
## Run 11 stress 0.06872938
## Run 12 stress 0.06707602
## Run 13 stress 0.06707606
## Run 14 stress 0.06707599
## Run 15 stress 0.06612966
## ... New best solution
## ... Procrustes: rmse 3.104884e-05 max resid 5.631552e-05
## ... Similar to previous best
## Run 16 stress 0.06612947
## ... New best solution
## ... Procrustes: rmse 0.0002375855 max resid 0.0004328182
## ... Similar to previous best
## Run 17 stress 0.06707624
## Run 18 stress 0.06612948
## ... Procrustes: rmse 1.319471e-05 max resid 2.896429e-05
## ... Similar to previous best
## Run 19 stress 0.06612985
## ... Procrustes: rmse 0.0003502671 max resid 0.0006367793
## ... Similar to previous best
## Run 20 stress 0.06612948
## ... Procrustes: rmse 2.154406e-05 max resid 5.346227e-05
## ... Similar to previous best
## *** Solution reached
traj_xy <- scores(traj_mds)
# Plot the trajectory
plot.mds.gg(traj_mds, labels = TRUE, txt.x = -2.5, txt.y = 1)
trajectoryPlot(traj_xy, sites, surveys, lwd = 2)
Often when conducting an unconstrained ordination we have information about a priori groups or explanatory variables for each site. We emphasise the a priori as there is a risk of circularity in using ordination to identify groups and then assessing whether they differ from each other. There are two questions we might ask: (i) do these groups differ in their location, and (ii) do the groups differ in their scatter (‘dispersion’). The first question would allow us to assess whether there have been shifts in community structure under different management regimes, for example; the second, is relevant where there may not be a change but a site is winnowed out to a subset of some other reference community (e.g. under invasion). Most of these methods are permutation-based (meaning we can use parallelisation to speed thigns uo) and operate on the dissimilarity matrix rather than an ordination. We’ll use the Aotea data to demonstrate these methods as we have groups (geographic locations).
# reduce if you want to speed up!
n_perm <- 499
n_cores <- parallel::detectCores() - 1 # cores for parallelisation (set to 1 for none)
aotea_dist <- vegdist(aotea_pa, method = "bray")
# ANOSIM on 'site
aotea_anosim <-anosim(aotea_dist, aotea_site$code, permutations = n_perm, parallel = n_cores)
# PERMANOVA on 'site'
aotea_adonis <- adonis2(aotea_dist ~ aotea_site$code, permutations = n_perm, parallel = n_cores)
# You can't do pairwise comparisons in adonis2 but the pairwiseAdonis package allows this
aotea_pa_pw <- aotea_pa
aotea_pa_pw$site <- aotea_site$code
aotea_pw_adonis <- pairwise.adonis2(aotea_pa[,-274] ~ site, data = aotea_pa_pw, permutations = n_perm, parallel = n_cores)
# Test for homogeneity of variance (scatter)
aotea_mod <- betadisper(aotea_dist, aotea_site$code)
aotea_mod_perm <- permutest(aotea_mod, pairwise = TRUE, permutations = n_perm)
aotea_mod_HSD <- TukeyHSD(aotea_mod)
The plots below show the observed (red line) test statistic and the distribution of the simulated values – in both cases suggesting that there is a strong location effect of site (in this case due to successional differences). We can drill into this further by looking at the aotea_pw_adonis object.
# permuted stats
aotea_perm_dfr <- data.frame(perm = c(permustats(aotea_anosim)$permutations,
permustats(aotea_adonis)$permutations),
test = rep(c('ANOSIM', 'PERMANOVA'), each = 499))
# observed stat (for comparison)
aotea_stat_dfr <- data.frame(stat = c(permustats(aotea_anosim)$statistic,
permustats(aotea_adonis)$statistc),
test = c('ANOSIM', 'PERMANOVA'))
## Warning in data.frame(stat = c(permustats(aotea_anosim)$statistic,
## permustats(aotea_adonis)$statistc), : row names were found from a short variable
## and have been discarded
aotea_sim_gg <- ggplot(aotea_perm_dfr) +
geom_histogram(aes(x = perm)) +
geom_vline(data = aotea_stat_dfr, aes(xintercept = stat), col = "red") +
facet_wrap(~test, scales = "free_x") +
labs (x = 'Permutation statistics', y = 'Frequency') +
theme_minimal()
print(aotea_sim_gg)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
aotea_adonis_td <- tidy(aotea_adonis)
## Warning in tidy.anova(aotea_adonis): The following column names in ANOVA output
## were not recognized or transformed: SumOfSqs, R2
disp_rc_plot <- plot(aotea_mod, main = "")
As discussed in the main text, ecological community data can have strong mean-variance relationships (Warton et al., 2015). Model-based approaches to multivariate analysis are a direct response to this issue. One way to address this problem is a transformation of the data to down-weight abundant species (e.g. log-transform). Clarke et al (2006) describe a method that individually weights species based on their departure from a Poisson (mnean = variance) distribution.
inverts_dispw <- dispweight(inverts) # vegan::dispweight implements the method
iv_var <- apply(inverts, 2, var)
iv_mean <- apply(inverts, 2, mean)
iv_sig <- as.factor(ifelse(attr(inverts_dispw, "p") < 0.01, 1, 2))
iv_dfr <- data.frame(m = iv_mean, v = iv_var, s = iv_sig)
mv_raw_iv <- ggplot(iv_dfr) +
geom_point(aes(x = m , y = v, size = m /v, col = s)) +
geom_abline(slope = 1, intercept = 0) +
scale_x_log10() +
scale_y_log10() +
labs(x = "Mean abundance", y = "Var. abundance") +
theme_bw()
invert_wgt_mds <- metaMDS(inverts_dispw, distance = 'bray', autotransform = FALSE, trace = 1)
## Run 0 stress 0.2309683
## Run 1 stress 0.2382234
## Run 2 stress 0.2841807
## Run 3 stress 0.2517004
## Run 4 stress 0.2321835
## Run 5 stress 0.2770384
## Run 6 stress 0.2343746
## Run 7 stress 0.2411745
## Run 8 stress 0.2810802
## Run 9 stress 0.2764593
## Run 10 stress 0.2321836
## Run 11 stress 0.2624391
## Run 12 stress 0.2385838
## Run 13 stress 0.2330111
## Run 14 stress 0.230266
## ... New best solution
## ... Procrustes: rmse 0.01669144 max resid 0.06824641
## Run 15 stress 0.2563848
## Run 16 stress 0.2725067
## Run 17 stress 0.2390053
## Run 18 stress 0.2633822
## Run 19 stress 0.2409138
## Run 20 stress 0.2411743
## *** No convergence -- monoMDS stopping criteria:
## 2: no. of iterations >= maxit
## 18: stress ratio > sratmax
iv_mds_wgt_gg <- plot.mds.gg(invert_wgt_mds, txt.x = -0.9, txt.y = -0.8, labels = TRUE)
(mv_raw_iv)
(iv_mds_gg + iv_mds_wgt_gg)
Finally, we might want to identify the species (or variable) that characterise specific groups. One commonly employed method to so this is SIMPER (for Brya-Curtis data). This method, however, tends to conflate changes in species mean and variance across groups and so may pick out the most variable (see Warton et al. (2012)). We’ll try it on the Aotea vegetation data to look at what species might contribute to differences between geographic locations.
simper_veg <- simper(aotea_pa, group = aotea_site$code)
# simper_veg
The function provides a large amount of information that we need to sift through (not printed out here), but we can see which species contribute most to differences between groups. We need to use permutation to assess these differences more robustly.
if (file.exists("rds/aotea_simper_perm.rds")) {
aotea_mds <- readRDS("rds/aotea_simper_perm.rds") } else {
n_cores <- parallel::detectCores() - 1
n_perm <-19
simper_perm_veg <- simper(aotea_pa, group = aotea_site$code, permutations = n_perm, parallel = n_cores, trace = FALSE)
saveRDS(aotea_mds, "rds/aotea_simper_perm.rds")
}
# summary(simper_perm_veg)
A different, and potentially more robust, approach is to use the indicator analysis descrined by de Caceres and colleagues (2009; 2010). This method seeks to identify ‘faithful’ species - in other words, those species that if present enable you to confidently determine the vegetation type (or types). The indicspecies package implements this approach, building on the work of Dufrêne and Legendre (1997).
n_perm <- 99
aotea_indic <- multipatt(aotea_pa, cluster = aotea_site$code, control = how(nperm = n_perm))
summary(aotea_indic, indvalcom = TRUE, alpha = 0.01) # summarise, showing A and B for species with p <= .01
##
## Multilevel pattern analysis
## ---------------------------
##
## Association function: IndVal.g
## Significance level (alpha): 0.01
##
## Total number of species: 273
## Selected number of species: 139
## Number of species associated to 1 group: 64
## Number of species associated to 2 groups: 27
## Number of species associated to 3 groups: 13
## Number of species associated to 4 groups: 13
## Number of species associated to 5 groups: 16
## Number of species associated to 6 groups: 5
## Number of species associated to 7 groups: 1
##
## List of species associated to each combination:
##
## Group AB #sps. 6
## A B stat p.value
## Rubcis 0.55760 0.61765 0.587 0.01 **
## Pinpin 0.91924 0.26471 0.493 0.01 **
## Hellan 0.72072 0.17647 0.357 0.01 **
## Droaur 1.00000 0.08824 0.297 0.01 **
## Polmyr 1.00000 0.05882 0.243 0.01 **
## Lasgla 0.63619 0.08824 0.237 0.01 **
##
## Group AW #sps. 21
## A B stat p.value
## Schten 0.9201 0.6250 0.758 0.01 **
## Moraff 0.7847 0.6375 0.707 0.01 **
## Gonagg 0.9798 0.3750 0.606 0.01 **
## Erilus 0.7372 0.4000 0.543 0.01 **
## Eribac 1.0000 0.2875 0.536 0.01 **
## Pomamo 0.7924 0.2750 0.467 0.01 **
## Gahlac 0.6067 0.2750 0.408 0.01 **
## Linlin 1.0000 0.1375 0.371 0.01 **
## PinusSpp 1.0000 0.1375 0.371 0.01 **
## Baujun 1.0000 0.1000 0.316 0.01 **
## Cyafra 1.0000 0.0875 0.296 0.01 **
## Hyprad 1.0000 0.0875 0.296 0.01 **
## Schmas 1.0000 0.0875 0.296 0.01 **
## Hebstr 0.7633 0.1000 0.276 0.01 **
## Bauten 1.0000 0.0750 0.274 0.01 **
## Hakgib 0.8001 0.0875 0.265 0.01 **
## Paesca 0.9063 0.0750 0.261 0.01 **
## Cenuni 0.7565 0.0750 0.238 0.01 **
## GhnaphaliumSpp 1.0000 0.0500 0.224 0.01 **
## Glemic 1.0000 0.0375 0.194 0.01 **
## Pimare 1.0000 0.0375 0.194 0.01 **
##
## Group GF #sps. 2
## A B stat p.value
## Nescun 1.00000 0.06977 0.264 0.01 **
## Nercil 1.00000 0.04651 0.216 0.01 **
##
## Group HA #sps. 1
## A B stat p.value
## Corsel 1.00000 0.08743 0.296 0.01 **
##
## Group LW #sps. 15
## A B stat p.value
## RubusSpp 0.83736 0.19380 0.403 0.01 **
## Ariser 1.00000 0.13953 0.374 0.01 **
## Cyafas 1.00000 0.11628 0.341 0.01 **
## Strhet 1.00000 0.10853 0.329 0.01 **
## Melmic 0.66690 0.14729 0.313 0.01 **
## Carser 0.70862 0.09302 0.257 0.01 **
## CordylineSpp 1.00000 0.06202 0.249 0.01 **
## Dicrep 1.00000 0.06202 0.249 0.01 **
## Pepurv 1.00000 0.05426 0.233 0.01 **
## Melter 0.74553 0.06977 0.228 0.01 **
## Clepan 1.00000 0.04651 0.216 0.01 **
## Alsmac 1.00000 0.04651 0.216 0.01 **
## Uncunc 0.63662 0.06977 0.211 0.01 **
## BlechSpp 0.85714 0.04651 0.200 0.01 **
## Pelrot 1.00000 0.03876 0.197 0.01 **
##
## Group NT #sps. 5
## A B stat p.value
## Myrsal 0.61680 0.50000 0.555 0.01 **
## Copgra 0.47294 0.55556 0.513 0.01 **
## Corban 0.73103 0.20370 0.386 0.01 **
## GahSpp 1.00000 0.14815 0.385 0.01 **
## Lycvol 1.00000 0.07407 0.272 0.01 **
##
## Group PT #sps. 12
## A B stat p.value
## Lycdeu 0.96712 0.36765 0.596 0.01 **
## AlseuoSpp 0.49243 0.60294 0.545 0.01 **
## Psedis 0.72762 0.33824 0.496 0.01 **
## Tortor 0.88356 0.11765 0.322 0.01 **
## Halkir 1.00000 0.10294 0.321 0.01 **
## Copluc 0.65220 0.14706 0.310 0.01 **
## Ichpyg 1.00000 0.08824 0.297 0.01 **
## Schfis 1.00000 0.08824 0.297 0.01 **
## Lyccer 0.84753 0.10294 0.295 0.01 **
## Nesmon 0.78628 0.08824 0.263 0.01 **
## NesSpp 1.00000 0.04412 0.210 0.01 **
## Pithut 1.00000 0.02941 0.171 0.01 **
##
## Group Te #sps. 2
## A B stat p.value
## Aspbul 0.8435 0.3486 0.542 0.01 **
## Micava 0.9253 0.2294 0.461 0.01 **
##
## Group AB+AW #sps. 3
## A B stat p.value
## Hakser 0.7884 0.5175 0.639 0.01 **
## Pteesc 0.7639 0.5088 0.623 0.01 **
## Leplat 0.6987 0.4912 0.586 0.01 **
##
## Group AB+NT #sps. 1
## A B stat p.value
## Blenov 0.6103 0.3523 0.464 0.01 **
##
## Group AB+PT #sps. 1
## A B stat p.value
## Dianig 0.5651 0.2745 0.394 0.01 **
##
## Group AB+Te #sps. 2
## A B stat p.value
## Adicun 0.68577 0.39683 0.522 0.01 **
## Pnepen 0.66712 0.08333 0.236 0.01 **
##
## Group HA+LW #sps. 3
## A B stat p.value
## Dacdac 0.81860 0.15385 0.355 0.01 **
## CarexSpp 0.65216 0.14103 0.303 0.01 **
## Aleexc 0.97066 0.07692 0.273 0.01 **
##
## Group LW+PT #sps. 1
## A B stat p.value
## Pitten 0.8569 0.1371 0.343 0.01 **
##
## Group LW+Te #sps. 4
## A B stat p.value
## Micsca 0.8453 0.3401 0.536 0.01 **
## Psecra 0.8903 0.2363 0.459 0.01 **
## Pipexc 0.7725 0.1210 0.306 0.01 **
## MeteroSpp 0.8129 0.1037 0.290 0.01 **
##
## Group NT+PT #sps. 9
## A B stat p.value
## Agaaus 0.73260 0.51639 0.615 0.01 **
## Brakir 0.85517 0.40164 0.586 0.01 **
## Neslan 0.69028 0.37705 0.510 0.01 **
## Daccup 0.73244 0.34426 0.502 0.01 **
## Oleran 0.71581 0.20492 0.383 0.01 **
## Prufer 0.63931 0.15574 0.316 0.01 **
## Weisil 0.84146 0.10656 0.299 0.01 **
## Blefra 0.90088 0.09836 0.298 0.01 **
## Gledic 0.95820 0.06557 0.251 0.01 **
##
## Group NT+Te #sps. 1
## A B stat p.value
## Metper 0.6851 0.3897 0.517 0.01 **
##
## Group PT+Te #sps. 2
## A B stat p.value
## CollospSpp 0.8937 0.2517 0.474 0.01 **
## HymenophyllumSpp 0.7468 0.1469 0.331 0.01 **
##
## Group AB+GF+HA #sps. 2
## A B stat p.value
## Dooaus 0.84546 0.31214 0.514 0.01 **
## Uleeur 1.00000 0.08671 0.294 0.01 **
##
## Group AB+HA+PT #sps. 2
## A B stat p.value
## GahniaSpp 0.7847 0.4737 0.610 0.01 **
## SchSpp 0.9311 0.1614 0.388 0.01 **
##
## Group AB+LW+PT #sps. 1
## A B stat p.value
## PterisSpp 0.86023 0.09957 0.293 0.01 **
##
## Group AB+NT+PT #sps. 1
## A B stat p.value
## Phytri 0.9132 0.4615 0.649 0.01 **
##
## Group HA+LW+Te #sps. 1
## A B stat p.value
## Corlae 0.951 0.100 0.308 0.01 **
##
## Group HA+NT+PT #sps. 2
## A B stat p.value
## Hebmac 1.0000 0.2000 0.447 0.01 **
## Podtot 0.7918 0.2393 0.435 0.01 **
##
## Group LW+NT+Te #sps. 2
## A B stat p.value
## Rhosap 0.7684 0.7880 0.778 0.01 **
## Metful 0.7572 0.1347 0.319 0.01 **
##
## Group LW+PT+Te #sps. 1
## A B stat p.value
## Metdif 0.7884 0.1687 0.365 0.01 **
##
## Group NT+PT+Te #sps. 1
## A B stat p.value
## Schdig 0.86831 0.07647 0.258 0.01 **
##
## Group AB+AW+GF+HA #sps. 1
## A B stat p.value
## Lepsco 0.8138 0.5469 0.667 0.01 **
##
## Group AB+AW+HA+PT #sps. 1
## A B stat p.value
## Olefur 0.8895 0.4712 0.647 0.01 **
##
## Group AB+GF+HA+LW #sps. 1
## A B stat p.value
## ClematisSpp 0.9316 0.1200 0.334 0.01 **
##
## Group AB+GF+LW+Te #sps. 1
## A B stat p.value
## UnciniaSpp 0.9364 0.1412 0.364 0.01 **
##
## Group AB+HA+LW+Te #sps. 1
## A B stat p.value
## Coparb 0.8659 0.4255 0.607 0.01 **
##
## Group AB+HA+NT+PT #sps. 1
## A B stat p.value
## Cyajun 0.9040 0.1327 0.346 0.01 **
##
## Group AB+LW+NT+PT #sps. 2
## A B stat p.value
## Copspa 0.8172 0.3509 0.535 0.01 **
## Lygart 0.7562 0.1018 0.277 0.01 **
##
## Group AW+GF+HA+PT #sps. 1
## A B stat p.value
## Metexc 0.90829 0.06522 0.243 0.01 **
##
## Group HA+LW+NT+Te #sps. 1
## A B stat p.value
## Asppol 0.9384 0.1045 0.313 0.01 **
##
## Group LW+NT+PT+Te #sps. 3
## A B stat p.value
## Beitar 0.8409 0.6546 0.742 0.01 **
## Dysspe 0.7903 0.6077 0.693 0.01 **
## Ripsca 0.7846 0.3859 0.550 0.01 **
##
## Group AB+AW+GF+HA+PT #sps. 1
## A B stat p.value
## Leufas 0.9077 0.5162 0.685 0.01 **
##
## Group AB+AW+HA+LW+Te #sps. 1
## A B stat p.value
## Oplhir 0.9041 0.1770 0.4 0.01 **
##
## Group AB+HA+LW+NT+Te #sps. 4
## A B stat p.value
## Melram 0.8698 0.4061 0.594 0.01 **
## Micpus 0.9414 0.2929 0.525 0.01 **
## Micopus 0.9300 0.2929 0.522 0.01 **
## Aspfla 0.8732 0.1521 0.364 0.01 **
##
## Group AB+HA+NT+PT+Te #sps. 1
## A B stat p.value
## AstSpp 0.9285 0.1562 0.381 0.01 **
##
## Group AB+LW+NT+PT+Te #sps. 2
## A B stat p.value
## Psearb 0.8796 0.3559 0.559 0.01 **
## Blefil 0.8304 0.2584 0.463 0.01 **
##
## Group AW+HA+LW+NT+Te #sps. 1
## A B stat p.value
## Coprob 0.93542 0.08886 0.288 0.01 **
##
## Group GF+HA+LW+NT+Te #sps. 1
## A B stat p.value
## Beitaw 0.9394 0.3240 0.552 0.01 **
##
## Group GF+LW+NT+PT+Te #sps. 1
## A B stat p.value
## Kniexc 0.8673 0.3779 0.573 0.01 **
##
## Group HA+LW+NT+PT+Te #sps. 4
## A B stat p.value
## Hohpop 0.97342 0.28988 0.531 0.01 **
## Hedarb 0.92663 0.28221 0.511 0.01 **
## TmesipterisSpp 0.96750 0.09969 0.311 0.01 **
## Pyrele 0.96142 0.08436 0.285 0.01 **
##
## Group AB+AW+GF+HA+LW+NT #sps. 1
## A B stat p.value
## Coprha 0.9025 0.5419 0.699 0.01 **
##
## Group AB+AW+HA+LW+NT+Te #sps. 1
## A B stat p.value
## Brarep 0.9248 0.5645 0.723 0.01 **
##
## Group AB+HA+LW+NT+PT+Te #sps. 2
## A B stat p.value
## Myraus 0.9063 0.6254 0.753 0.01 **
## Freban 1.0000 0.2332 0.483 0.01 **
##
## Group GF+HA+LW+NT+PT+Te #sps. 1
## A B stat p.value
## Vitluc 1.0000 0.1729 0.416 0.01 **
##
## Group AB+AW+GF+HA+LW+NT+PT #sps. 1
## A B stat p.value
## Kunrob 0.9719 0.7031 0.827 0.01 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Again, this function yields a wealth of information. We can see that of the 273 species in the data, c. 140 are deemed to provide association information (85 to 1 group). We can also see two parameters A and B. The first of these is the probability that a site belongs to some group given the presence of the species and B the probability of finding the species given the site type. As the (vignette for indicspecies)[https://cran.r-project.org/web/packages/indicspecies/vignettes/indicspeciesTutorial.pdf] notes, there are potential problems here with the familywise error rate so some correction (or caution) may be required.
If we want to hone in on indicator taxa for specific groups then we can use the indicators function. For example, we could identify indicators (here just pairs) for the community at the Awana (‘AW’) site (the earliest in the succession). We can constrain so that A and B are above some threshold.
awana_indicators <- indicators(aotea_pa, cluster = aotea_site$code, group = "AW", At = 0.7, Bt = 0.3, max.order = 2, verbose = TRUE) # set verbose FALSE to turn off the output
## Target site group: AW
## Number of candidate species: 273
## Number of sites: 895
## Size of the site group: 80
## Starting species 1 ... accepted combinations: 0
## Starting species 2 ... accepted combinations: 0
## Starting species 3 ... accepted combinations: 0
## Starting species 4 ... accepted combinations: 0
## Starting species 5 ... accepted combinations: 0
## Starting species 6 ... accepted combinations: 0
## Starting species 7 ... accepted combinations: 0
## Starting species 8 ... accepted combinations: 0
## Starting species 9 ... accepted combinations: 0
## Starting species 10 ... accepted combinations: 0
## Starting species 11 ... accepted combinations: 0
## Starting species 12 ... accepted combinations: 0
## Starting species 13 ... accepted combinations: 0
## Starting species 14 ... accepted combinations: 0
## Starting species 15 ... accepted combinations: 0
## Starting species 16 ... accepted combinations: 0
## Starting species 17 ... accepted combinations: 0
## Starting species 18 ... accepted combinations: 0
## Starting species 19 ... accepted combinations: 0
## Starting species 20 ... accepted combinations: 0
## Starting species 21 ... accepted combinations: 0
## Starting species 22 ... accepted combinations: 0
## Starting species 23 ... accepted combinations: 0
## Starting species 24 ... accepted combinations: 0
## Starting species 25 ... accepted combinations: 0
## Starting species 26 ... accepted combinations: 0
## Starting species 27 ... accepted combinations: 0
## Starting species 28 ... accepted combinations: 0
## Starting species 29 ... accepted combinations: 0
## Starting species 30 ... accepted combinations: 0
## Starting species 31 ... accepted combinations: 0
## Starting species 32 ... accepted combinations: 0
## Starting species 33 ... accepted combinations: 0
## Starting species 34 ... accepted combinations: 0
## Starting species 35 ... accepted combinations: 0
## Starting species 36 ... accepted combinations: 1
## Starting species 37 ... accepted combinations: 1
## Starting species 38 ... accepted combinations: 1
## Starting species 39 ... accepted combinations: 1
## Starting species 40 ... accepted combinations: 1
## Starting species 41 ... accepted combinations: 1
## Starting species 42 ... accepted combinations: 1
## Starting species 43 ... accepted combinations: 1
## Starting species 44 ... accepted combinations: 1
## Starting species 45 ... accepted combinations: 1
## Starting species 46 ... accepted combinations: 1
## Starting species 47 ... accepted combinations: 1
## Starting species 48 ... accepted combinations: 1
## Starting species 49 ... accepted combinations: 1
## Starting species 50 ... accepted combinations: 1
## Starting species 51 ... accepted combinations: 1
## Starting species 52 ... accepted combinations: 1
## Starting species 53 ... accepted combinations: 1
## Starting species 54 ... accepted combinations: 1
## Starting species 55 ... accepted combinations: 1
## Starting species 56 ... accepted combinations: 1
## Starting species 57 ... accepted combinations: 1
## Starting species 58 ... accepted combinations: 1
## Starting species 59 ... accepted combinations: 1
## Starting species 60 ... accepted combinations: 1
## Starting species 61 ... accepted combinations: 1
## Starting species 62 ... accepted combinations: 1
## Starting species 63 ... accepted combinations: 1
## Starting species 64 ... accepted combinations: 1
## Starting species 65 ... accepted combinations: 4
## Starting species 66 ... accepted combinations: 4
## Starting species 67 ... accepted combinations: 4
## Starting species 68 ... accepted combinations: 4
## Starting species 69 ... accepted combinations: 4
## Starting species 70 ... accepted combinations: 4
## Starting species 71 ... accepted combinations: 4
## Starting species 72 ... accepted combinations: 10
## Starting species 73 ... accepted combinations: 10
## Starting species 74 ... accepted combinations: 10
## Starting species 75 ... accepted combinations: 10
## Starting species 76 ... accepted combinations: 12
## Starting species 77 ... accepted combinations: 12
## Starting species 78 ... accepted combinations: 12
## Starting species 79 ... accepted combinations: 12
## Starting species 80 ... accepted combinations: 12
## Starting species 81 ... accepted combinations: 12
## Starting species 82 ... accepted combinations: 12
## Starting species 83 ... accepted combinations: 12
## Starting species 84 ... accepted combinations: 12
## Starting species 85 ... accepted combinations: 12
## Starting species 86 ... accepted combinations: 12
## Starting species 87 ... accepted combinations: 12
## Starting species 88 ... accepted combinations: 12
## Starting species 89 ... accepted combinations: 12
## Starting species 90 ... accepted combinations: 15
## Starting species 91 ... accepted combinations: 17
## Starting species 92 ... accepted combinations: 18
## Starting species 93 ... accepted combinations: 18
## Starting species 94 ... accepted combinations: 18
## Starting species 95 ... accepted combinations: 18
## Starting species 96 ... accepted combinations: 18
## Starting species 97 ... accepted combinations: 18
## Starting species 98 ... accepted combinations: 18
## Starting species 99 ... accepted combinations: 18
## Starting species 100 ... accepted combinations: 18
## Starting species 101 ... accepted combinations: 18
## Starting species 102 ... accepted combinations: 18
## Starting species 103 ... accepted combinations: 18
## Starting species 104 ... accepted combinations: 18
## Starting species 105 ... accepted combinations: 18
## Starting species 106 ... accepted combinations: 18
## Starting species 107 ... accepted combinations: 18
## Starting species 108 ... accepted combinations: 18
## Starting species 109 ... accepted combinations: 18
## Starting species 110 ... accepted combinations: 18
## Starting species 111 ... accepted combinations: 21
## Starting species 112 ... accepted combinations: 21
## Starting species 113 ... accepted combinations: 21
## Starting species 114 ... accepted combinations: 21
## Starting species 115 ... accepted combinations: 21
## Starting species 116 ... accepted combinations: 21
## Starting species 117 ... accepted combinations: 22
## Starting species 118 ... accepted combinations: 22
## Starting species 119 ... accepted combinations: 22
## Starting species 120 ... accepted combinations: 22
## Starting species 121 ... accepted combinations: 22
## Starting species 122 ... accepted combinations: 22
## Starting species 123 ... accepted combinations: 22
## Starting species 124 ... accepted combinations: 22
## Starting species 125 ... accepted combinations: 22
## Starting species 126 ... accepted combinations: 22
## Starting species 127 ... accepted combinations: 22
## Starting species 128 ... accepted combinations: 22
## Starting species 129 ... accepted combinations: 22
## Starting species 130 ... accepted combinations: 22
## Starting species 131 ... accepted combinations: 22
## Starting species 132 ... accepted combinations: 22
## Starting species 133 ... accepted combinations: 22
## Starting species 134 ... accepted combinations: 22
## Starting species 135 ... accepted combinations: 22
## Starting species 136 ... accepted combinations: 22
## Starting species 137 ... accepted combinations: 22
## Starting species 138 ... accepted combinations: 22
## Starting species 139 ... accepted combinations: 22
## Starting species 140 ... accepted combinations: 22
## Starting species 141 ... accepted combinations: 22
## Starting species 142 ... accepted combinations: 22
## Starting species 143 ... accepted combinations: 22
## Starting species 144 ... accepted combinations: 22
## Starting species 145 ... accepted combinations: 22
## Starting species 146 ... accepted combinations: 22
## Starting species 147 ... accepted combinations: 22
## Starting species 148 ... accepted combinations: 22
## Starting species 149 ... accepted combinations: 22
## Starting species 150 ... accepted combinations: 23
## Starting species 151 ... accepted combinations: 23
## Starting species 152 ... accepted combinations: 23
## Starting species 153 ... accepted combinations: 23
## Starting species 154 ... accepted combinations: 23
## Starting species 155 ... accepted combinations: 23
## Starting species 156 ... accepted combinations: 23
## Starting species 157 ... accepted combinations: 23
## Starting species 158 ... accepted combinations: 23
## Starting species 159 ... accepted combinations: 23
## Starting species 160 ... accepted combinations: 23
## Starting species 161 ... accepted combinations: 25
## Starting species 162 ... accepted combinations: 25
## Starting species 163 ... accepted combinations: 25
## Starting species 164 ... accepted combinations: 25
## Starting species 165 ... accepted combinations: 25
## Starting species 166 ... accepted combinations: 25
## Starting species 167 ... accepted combinations: 25
## Starting species 168 ... accepted combinations: 25
## Starting species 169 ... accepted combinations: 25
## Starting species 170 ... accepted combinations: 25
## Starting species 171 ... accepted combinations: 25
## Starting species 172 ... accepted combinations: 25
## Starting species 173 ... accepted combinations: 25
## Starting species 174 ... accepted combinations: 25
## Starting species 175 ... accepted combinations: 25
## Starting species 176 ... accepted combinations: 25
## Starting species 177 ... accepted combinations: 25
## Starting species 178 ... accepted combinations: 25
## Starting species 179 ... accepted combinations: 25
## Starting species 180 ... accepted combinations: 25
## Starting species 181 ... accepted combinations: 25
## Starting species 182 ... accepted combinations: 25
## Starting species 183 ... accepted combinations: 25
## Starting species 184 ... accepted combinations: 25
## Starting species 185 ... accepted combinations: 25
## Starting species 186 ... accepted combinations: 25
## Starting species 187 ... accepted combinations: 25
## Starting species 188 ... accepted combinations: 25
## Starting species 189 ... accepted combinations: 25
## Starting species 190 ... accepted combinations: 25
## Starting species 191 ... accepted combinations: 25
## Starting species 192 ... accepted combinations: 25
## Starting species 193 ... accepted combinations: 25
## Starting species 194 ... accepted combinations: 25
## Starting species 195 ... accepted combinations: 25
## Starting species 196 ... accepted combinations: 25
## Starting species 197 ... accepted combinations: 25
## Starting species 198 ... accepted combinations: 25
## Starting species 199 ... accepted combinations: 25
## Starting species 200 ... accepted combinations: 25
## Starting species 201 ... accepted combinations: 25
## Starting species 202 ... accepted combinations: 25
## Starting species 203 ... accepted combinations: 25
## Starting species 204 ... accepted combinations: 25
## Starting species 205 ... accepted combinations: 25
## Starting species 206 ... accepted combinations: 25
## Starting species 207 ... accepted combinations: 25
## Starting species 208 ... accepted combinations: 25
## Starting species 209 ... accepted combinations: 25
## Starting species 210 ... accepted combinations: 25
## Starting species 211 ... accepted combinations: 25
## Starting species 212 ... accepted combinations: 25
## Starting species 213 ... accepted combinations: 25
## Starting species 214 ... accepted combinations: 25
## Starting species 215 ... accepted combinations: 25
## Starting species 216 ... accepted combinations: 25
## Starting species 217 ... accepted combinations: 25
## Starting species 218 ... accepted combinations: 25
## Starting species 219 ... accepted combinations: 25
## Starting species 220 ... accepted combinations: 25
## Starting species 221 ... accepted combinations: 25
## Starting species 222 ... accepted combinations: 25
## Starting species 223 ... accepted combinations: 25
## Starting species 224 ... accepted combinations: 25
## Starting species 225 ... accepted combinations: 25
## Starting species 226 ... accepted combinations: 25
## Starting species 227 ... accepted combinations: 25
## Starting species 228 ... accepted combinations: 25
## Starting species 229 ... accepted combinations: 25
## Starting species 230 ... accepted combinations: 25
## Starting species 231 ... accepted combinations: 25
## Starting species 232 ... accepted combinations: 25
## Starting species 233 ... accepted combinations: 25
## Starting species 234 ... accepted combinations: 25
## Starting species 235 ... accepted combinations: 25
## Starting species 236 ... accepted combinations: 25
## Starting species 237 ... accepted combinations: 25
## Starting species 238 ... accepted combinations: 25
## Starting species 239 ... accepted combinations: 25
## Starting species 240 ... accepted combinations: 25
## Starting species 241 ... accepted combinations: 25
## Starting species 242 ... accepted combinations: 25
## Starting species 243 ... accepted combinations: 25
## Starting species 244 ... accepted combinations: 25
## Starting species 245 ... accepted combinations: 25
## Starting species 246 ... accepted combinations: 25
## Starting species 247 ... accepted combinations: 25
## Starting species 248 ... accepted combinations: 25
## Starting species 249 ... accepted combinations: 25
## Starting species 250 ... accepted combinations: 25
## Starting species 251 ... accepted combinations: 25
## Starting species 252 ... accepted combinations: 25
## Starting species 253 ... accepted combinations: 25
## Starting species 254 ... accepted combinations: 25
## Starting species 255 ... accepted combinations: 25
## Starting species 256 ... accepted combinations: 25
## Starting species 257 ... accepted combinations: 25
## Starting species 258 ... accepted combinations: 25
## Starting species 259 ... accepted combinations: 25
## Starting species 260 ... accepted combinations: 25
## Starting species 261 ... accepted combinations: 25
## Starting species 262 ... accepted combinations: 25
## Starting species 263 ... accepted combinations: 25
## Starting species 264 ... accepted combinations: 25
## Starting species 265 ... accepted combinations: 25
## Starting species 266 ... accepted combinations: 25
## Starting species 267 ... accepted combinations: 25
## Starting species 268 ... accepted combinations: 25
## Starting species 269 ... accepted combinations: 25
## Starting species 270 ... accepted combinations: 25
## Starting species 271 ... accepted combinations: 25
## Starting species 272 ... accepted combinations: 25
## Starting species 273 ... accepted combinations: 25
## Number of valid combinations: 25
## Number of remaining species: 12
## Calculating statistical significance (permutational test)...
summary(awana_indicators)
## Result of 'indicators()':
##
## Number of plots in target site group: 80
## Number of candidate species: 273
## Number of species used in combinations: 273
## Number of indicators (single species or species combinations) kept: 25
## Group coverage: 81.25%
print(awana_indicators)
## A B sqrtIV p.value
## Lepsco+Schten 0.9090909 0.6250 0.7537784 0.005
## Schten 0.8771930 0.6250 0.7404361 0.005
## Olefur+Schten 0.9318182 0.5125 0.6910549 0.005
## Lepsco+Moraff 0.7391304 0.6375 0.6864369 0.005
## Moraff+Pteesc 0.9512195 0.4875 0.6809695 0.005
## Moraff+Schten 0.9736842 0.4625 0.6710655 0.005
## Pteesc+Schten 0.9736842 0.4625 0.6710655 0.005
## Hakser+Moraff 0.9047619 0.4750 0.6555623 0.005
## Leplat+Schten 0.9047619 0.4750 0.6555623 0.005
## Leufas+Schten 0.8510638 0.5000 0.6523281 0.005
## Hakser+Schten 0.9705882 0.4125 0.6327461 0.005
## Gonagg 0.9677419 0.3750 0.6024145 0.005
## Gonagg+Lepsco 0.9677419 0.3750 0.6024145 0.005
## Gonagg+Schten 1.0000000 0.3375 0.5809475 0.005
## Erilus+Schten 0.9642857 0.3375 0.5704791 0.005
## Gonagg+Olefur 1.0000000 0.3250 0.5700877 0.005
## Moraff+Olefur 0.7058824 0.4500 0.5636019 0.005
## Erilus+Pteesc 0.9032258 0.3500 0.5622535 0.005
## Gonagg+Leufas 1.0000000 0.3125 0.5590170 0.005
## Erilus+Moraff 0.7948718 0.3875 0.5549890 0.005
## Leplat+Moraff 0.7948718 0.3875 0.5549890 0.005
## Coprha+Schten 0.8484848 0.3500 0.5449493 0.005
## Gonagg+Leplat 0.9600000 0.3000 0.5366563 0.005
## Leplat+Pteesc 0.7209302 0.3875 0.5285456 0.005
## Schten+Kunrob 0.8181818 0.3375 0.5254868 0.005
We can estimate confidence limits for these statistics.
n_boot <- 19
awana_indicators_boot <- indicators(aotea_pa, cluster = aotea_site$code, group = "AW", At = 0.7, Bt = 0.3, max.order = 2, nboot.ci = n_boot, verbose = FALSE) # set verbose FALSE to turn off the output
summary(awana_indicators_boot)
## Result of 'indicators()':
##
## Number of plots in target site group: 80
## Number of candidate species: 273
## Number of species used in combinations: 273
## Number of indicators (single species or species combinations) kept: 25
## Group coverage: 81.25%
print(awana_indicators_boot$A) # inspect the A values
## stat lowerCI upperCI
## Coprha+Schten 0.8484848 0.6956522 0.9655172
## Erilus+Moraff 0.7948718 0.6585366 0.9000000
## Erilus+Pteesc 0.9032258 0.7083333 1.0000000
## Erilus+Schten 0.9642857 0.8571429 1.0000000
## Gonagg 0.9677419 0.9000000 1.0000000
## Gonagg+Leplat 0.9600000 0.8750000 1.0000000
## Gonagg+Lepsco 0.9677419 0.9000000 1.0000000
## Gonagg+Leufas 1.0000000 1.0000000 1.0000000
## Gonagg+Olefur 1.0000000 1.0000000 1.0000000
## Gonagg+Schten 1.0000000 1.0000000 1.0000000
## Hakser+Moraff 0.9047619 0.7894737 0.9743590
## Hakser+Schten 0.9705882 0.8684211 1.0000000
## Leplat+Moraff 0.7948718 0.6571429 0.8787879
## Leplat+Pteesc 0.7209302 0.5609756 0.8750000
## Leplat+Schten 0.9047619 0.7804878 0.9767442
## Lepsco+Moraff 0.7391304 0.6166667 0.8356164
## Lepsco+Schten 0.9090909 0.8181818 0.9622642
## Leufas+Schten 0.8510638 0.7500000 0.9302326
## Moraff+Olefur 0.7058824 0.5932203 0.8039216
## Moraff+Pteesc 0.9512195 0.8235294 1.0000000
## Moraff+Schten 0.9736842 0.9047619 1.0000000
## Olefur+Schten 0.9318182 0.8222222 1.0000000
## Pteesc+Schten 0.9736842 0.8857143 1.0000000
## Schten 0.8771930 0.7959184 0.9433962
## Schten+Kunrob 0.8181818 0.6666667 0.9285714